# Tutorial 6: Application on Slide-seq mouse Cerebellum dataset. 
In this vignette, We performed `PROST` on the processed mouse cerebellum dataset from [(Samuel G. et al. 2019)](https://10.1126/science.aaw1219) to evaluate the computational efficiency.  
The [original data](https://singlecell.broadinstitute.org/single_cell/study/SCP795/) can be downloaded from [google drive](https://drive.google.com/drive/folders/1chkWzG0Y4YkO6ys-LLntXfXC3WDTk8Pm?usp=drive_link). 

---

### 1.Load PROST and its dependent packages

    import numpy as np
    import scanpy as sc
    import os
    import pandas as pd
    import warnings
    warnings.filterwarnings("ignore")
    import matplotlib.pyplot as plt
    import sys
    from sklearn import metrics
    import PROST
    PROST.__version__


    >>> ' 1.1.2 '


### 2.Set up the working environment and import data 

    # the location of R (used for the mclust clustering)
    ENVpath = "your path of PROST_ENV"            # refer to 'How to use PROST' section  
    os.environ['R_HOME'] = f'{ENVpath}/lib/R'
    os.environ['R_USER'] = f'{ENVpath}/lib/python3.7/site-packages/rpy2'
    
    # init
    SEED = 818
    PROST.setup_seed(SEED)
    
    # Set directory (If you want to use additional data, please change the file path)
    rootdir = 'datasets/Slide-seq/'

    input_dir = os.path.join(rootdir)
    output_dir = os.path.join(rootdir, 'results/')
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Read data
    adata = sc.read(input_dir+"used_data.h5")


### 3.Plot annotation

    # Plot annotation
    plt.rcParams["figure.figsize"] = (4,4)
    sc.pl.embedding(adata, basis="spatial", color="annotation",size = 8, show=False, title='annotation')
    plt.axis('off')
    plt.savefig(output_dir+"annotation.png", dpi=600, bbox_inches='tight')


![slide_seq_annotation](./_images/slide-seq/slide_seq_annotation.png "Plot annotation")


### 4.Calculate PI
    
    # Calculate PI
    adata = PROST.prepare_for_PI(adata, percentage = 0.01, platform="Slide-seq")
    adata = PROST.cal_PI(adata, platform="Slide-seq")

    # Save PI
    adata.write_h5ad(output_dir+"/PI_result.h5")

    # Draw SVGs detected by PI
    PROST.plot_gene(adata, platform="Slide-seq", sorted_by='PI', size = 0.3, top_n = 25, ncols_each_sheet = 5, nrows_each_sheet = 5,save_path = output_dir)
      
    
    >>> Filtering genes ...
    >>> Trying to set attribute `.var` of view, copying.
    >>> Normalization to each gene:
    >>> 100%|██████████████████████| 2193/2193 [00:00<00:00, 3513.47it/s]
    >>> Gaussian filtering for each gene:
    >>> 100%|██████████████████████| 2193/2193 [06:08<00:00,  5.96it/s]
    >>> Binary segmentation for each gene:
    >>> 100%|██████████████████████| 2193/2193 [00:07<00:00, 305.77it/s]
    >>> Spliting subregions for each gene:
    >>> 100%|██████████████████████| 2193/2193 [00:24<00:00, 88.86it/s]
    >>> Computing PROST Index for each gene:
    >>> 100%|██████████████████████| 2193/2193 [06:15<00:00,  5.83it/s]
    >>> PROST Index calculation completed !!
    >>> Drawing pictures:
    >>> 100%|██████████████████████| 1/1 [00:19<00:00, 19.73s/it]
    >>> Drawing completed !!


![slide_seq_PI](./_images/slide-seq/slide_seq_PI.png "Plot PI")

--- 

## Clustering on original data

    # Set the number of clusters
    n_clusters = 8     # same as annotation

### 1.Expression data preprocessing

    PROST.setup_seed(SEED)
    # Read data
    adata = sc.read(input_dir+"/used_data.h5")

    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)


### 2.Run PROST clustering
    PROST.run_PNN(adata, 
            adj_mode = "distance",
            min_distance = 80,
            init="mclust",
            n_clusters = n_clusters,                   
            tol = 5e-3,
            SEED=SEED,
            max_epochs = 25)


    >>> Calculating adjacency matrix ...
    >>> Running PCA ...
    >>> Laplacian Smoothing ...
    >>> Initializing cluster centers with mclust, n_clusters known
    >>> Epoch: : 27it [19:21, 43.02s/it, loss=0.1746949]             
    >>> Clustering completed !!


### 3.Calcluate ARI

    ARI = metrics.adjusted_rand_score(adata.obs["annotation"], adata.obs["clustering"])
    print("pp_clustering_ARI =", ARI)


    >>> pp_clustering_ARI = 0.3332276319944897


### 5.Save clustering result

    adata.obs["clustering"].to_csv(output_dir + "/clustering.csv", header = False)
    np.savetxt(output_dir + "/embedding.txt", adata.obsm["PROST"])
    adata.write_h5ad(output_dir + "/PNN_result.h5")


### 6.Plot clustering result

    adata = sc.read(output_dir + "/PNN_result.h5")
    plt.rcParams["figure.figsize"] = (5,5)
    sc.pl.embedding(adata, basis="spatial", color="clustering", size = 10,  show=False, title='clustering')
    plt.axis('off')
    plt.savefig(output_dir+"/clustering.png", dpi=600, bbox_inches='tight')


![slide_seq_clustering](./_images/slide-seq/slide_seq_clustering.png "slide_seq_clustering")

---